###############################################################################
#                     Replication files for                                   #
#     "America's Electorate is Increasingly Polarized Along                   #
#   Partisan Lines About Voting by Mail During the COVID-19 Crisis"           #
#                                                                             #
#           Mackenzie Lockhart, Seth Hill, Jennifer Merolla,                  #
#                     Mindy Romero, Thad Kousser                              #
#                                                                             #
#                                                                             #
#                       Prepared by Mac Lockhart                              #
#                           Aug 18, 2020                                      #
###############################################################################


rm(list=ls())
library(tidyverse)
library(DeclareDesign)
library(survey)
library(srvyr)
library(stargazer)
library(ggpubr)
#setwd()  #set location to data's location

#load cleaned data
df <- read_csv('VBM_polarization.csv')
  
#Recode variables  
df <- df %>% mutate(PID_3point = recode(df$PID,
                                        `1` = "Democrat",
                                        `2` = "Democrat",
                                        `3` = "Democrat",
                                        `5` = "Republican",
                                        `6` = "Republican",
                                        `7` = "Republican",
                                        `4` = "Independent"),
                    any_treat = recode(df$FL_16_DO,
                                       "Control" = 0,
                                       "InformationTreatment1" = 1,
                                       "InformationTreatment2" = 1),
                    any_treat_2 = recode(df$FL_16_DO,
                                         "Control" = "Control",
                                         "InformationTreatment1" = "Treated",
                                         "InformationTreatment2" = "Treated"),
                    auto_by_mail=recode(df$Q_17, "Yes"=1, "No"=0),
                    preffered_vote=recode(df$Q_7, "By voting at a traditional polling place on Election Day"=0,
                                          "By voting at a professionally staffed county elections office, either on Election Day or in early voting in the week or so before Election Day" = 0,
                                          "By mailing in your ballot, after that ballot was sent to you a month before Election Day"=1))
df$period <- ifelse(df$wave==2, 1, 0)

###Reproduce Table 1
m1 <- lm(auto_by_mail ~ any_treat*PID_3point + period*PID_3point, df, weights=the.wts)
m2 <- lm(preffered_vote ~ any_treat*PID_3point + period*PID_3point , df, weights=the.wts)
stargazer(m2, m1,  out="parties_multiwave.tex", 
          keep=c("any_treat", "PID_3pointRepublican:any_treat", "PID_3pointIndependent:any_treat",
                 "PID_3pointRepublican", "PID_3pointIndependent", "period", "Constant"),
          order=c(1,2,3,5,6,4,7, 8, 9),
          covariate.labels = c("Treatment", "Independent", "Republican",
                               "Treatment X Independent", "Treatment X Republican",
                               "Wave", "Wave X Independent", "Wave X Republican", 
                               "Constant"),
          dep.var.labels = c("Preferred Method","Automatic Ballot"), 
          notes="P-values based on one-tailed tests", notes.align="l",
          star.cutoffs = c(0.1, 0.02))

##Reproduce Figure 1
##Graphs from wave 1 (left column)
df1 <- subset(df, df$period==0)
#models
m1 <- lm(auto_by_mail ~ any_treat_2*PID_3point, df1, weights=the.wts)
m2 <- lm(preffered_vote ~ any_treat_2*PID_3point, df1, weights=the.wts)
#Fitted data
PID_3point <- c("Democrat", "Democrat", "Republican", "Republican", "Independent", "Independent")
any_treat_2<- c("Control","Treated","Control","Treated","Control","Treated")
the.wts <- c(1,1,1,1,1,1)
fitted <- data.frame(PID_3point, any_treat_2, the.wts)

#National Legislation
predictions <- predict(m1, fitted, interval = "confidence")
mydata <- cbind(fitted, predictions)
g1.1 <- ggplot(mydata, aes(any_treat_2, fit, group=PID_3point,
                           ymax = upr, ymin = lwr)) +
  geom_point() + geom_line(aes(linetype=PID_3point))+
  geom_errorbar(width = 0.05) + 
  labs(title = "For the upcoming November 3 election, would you support national legislation directing \n all states to send a vote by mail ballot to any voter who requests one?", 
       y="Proportion Supporting", x="Condition")+ scale_linetype_discrete(name="Party") + ylim(0, 1)
#Personal Preferences
predictions <- predict(m2, fitted, interval = "confidence")
mydata <- cbind(fitted, predictions)
g2.1 <- ggplot(mydata, aes(any_treat_2, fit, group=PID_3point,
                           ymax = upr, ymin = lwr)) +
  geom_point() + geom_line(aes(linetype=PID_3point))+
  geom_errorbar(width = 0.05) +
  labs(title = "April 2020 Survey \n \n In the upcoming November 3 election, if you had the ability to cast a ballot in any way you wished, \n what would be your most preferred way to cast a ballot?", 
       y="Proportion Vote-By-Mail", x="Condition")+ scale_linetype_discrete(name="Party") + ylim(0, 1)

##Graphs from wave 2 (right column)
df2 <- subset(df, df$period==1)
#models
m1 <- lm(auto_by_mail ~ any_treat_2*PID_3point, df2, weights=the.wts)
m2 <- lm(preffered_vote ~ any_treat_2*PID_3point, df2, weights=the.wts)
#Fitted data
PID_3point <- c("Democrat", "Democrat", "Republican", "Republican", "Independent", "Independent")
any_treat_2<- c("Control","Treated","Control","Treated","Control","Treated")
the.wts <- c(1,1,1,1,1,1)
fitted <- data.frame(PID_3point, any_treat_2, the.wts)

#National Legislation
predictions <- predict(m1, fitted, interval = "confidence")
mydata <- cbind(fitted, predictions)
g1.2 <- ggplot(mydata, aes(any_treat_2, fit, group=PID_3point,
                           ymax = upr, ymin = lwr)) +
  geom_point() + geom_line(aes(linetype=PID_3point))+
  geom_errorbar(width = 0.05) + 
  labs(title = "For the upcoming November 3 election, would you support national legislation directing \n all states to send a vote by mail ballot to any voter who requests one?", 
       y="Proportion Supporting", x="Condition")+ scale_linetype_discrete(name="Party") + ylim(0, 1)

#Personal Preference
predictions <- predict(m2, fitted, interval = "confidence")
mydata <- cbind(fitted, predictions)
g2.2 <- ggplot(mydata, aes(any_treat_2, fit, group=PID_3point,
                           ymax = upr, ymin = lwr)) +
  geom_point() + geom_line(aes(linetype=PID_3point))+
  geom_errorbar(width = 0.05) +
  labs(title = "June 2020 Survey \n \n In the upcoming November 3 election, if you had the ability to cast a ballot in any way you wished, \n what would be your most preferred way to cast a ballot?", 
       y="Proportion Vote-By-Mail", x="Condition")+ scale_linetype_discrete(name="Party") + ylim(0, 1)

#arrange graphs
party_effects <- ggarrange(g2.1, g2.2, g1.1, g1.2, nrow=2, ncol=2)
ggsave("party_effects_time.pdf", party_effects, height=10, width=18, units="in")

